####################protein interaction-based gene optimization method####################

##方式2：基于蛋白质互作（将互作的蛋白质数量作为频分）########
##Method 2: Based on protein interaction (taking the number of interacting proteins as frequency score)########
fun.base.interaction<- function(interaction,train.genes,test.genes){
  load(".\\ppi_all.RData")
  interaction <- ppi_all
  #1. Extract interacting proteins of training genes and test genes
  train.interaction <- interaction %>%
    dplyr::filter(OFFICIAL_SYMBOL_A %in%  train.genes | OFFICIAL_SYMBOL_B %in%  train.genes) %>%
    dplyr::select("OFFICIAL_SYMBOL_A", "OFFICIAL_SYMBOL_B") %>% unique()
  test.interaction <- interaction %>%
    dplyr::filter(OFFICIAL_SYMBOL_A %in%  test.genes | OFFICIAL_SYMBOL_B %in%  test.genes) %>%
    dplyr::select("OFFICIAL_SYMBOL_A", "OFFICIAL_SYMBOL_B") %>% unique()
  #2. Build a data frame to store the results
  scored<- data.frame(Symbol="",score_interaction=0)
  #3. Loop to "score" each test data
  for (test.gene in test.genes) {
    #3.1 One-step interaction
    first.step<- rbind(test.interaction %>% dplyr::filter(OFFICIAL_SYMBOL_A %in% test.gene & OFFICIAL_SYMBOL_B %in%  train.genes),
                       test.interaction %>% dplyr::filter(OFFICIAL_SYMBOL_A %in% train.genes & OFFICIAL_SYMBOL_B %in%  test.gene) )
    score <- unique(first.step$OFFICIAL_SYMBOL_A,first.step$OFFICIAL_SYMBOL_B) %in% train.genes %>% which() %>%  length()
    
    #3.2 Indirect interaction
    testgene.inter<- test.interaction %>% dplyr::filter(OFFICIAL_SYMBOL_A %in% test.gene | OFFICIAL_SYMBOL_B %in%  test.gene)
    test.inter.genes<- unique(c(testgene.inter$OFFICIAL_SYMBOL_A,testgene.inter$OFFICIAL_SYMBOL_B))
    test.inter.genes<- test.inter.genes[!( test.inter.genes %in% c(test.gene,train.genes))]
    
    secon.step<- rbind(train.interaction %>% dplyr::filter(OFFICIAL_SYMBOL_A %in% train.genes & OFFICIAL_SYMBOL_B %in%  test.inter.genes),
                       train.interaction %>% dplyr::filter(OFFICIAL_SYMBOL_A %in% test.inter.genes & OFFICIAL_SYMBOL_B %in%  train.genes)) %>%
      dplyr::select(OFFICIAL_SYMBOL_A,OFFICIAL_SYMBOL_B) %>%  unique()
    score2<- unique(secon.step$OFFICIAL_SYMBOL_A,secon.step$OFFICIAL_SYMBOL_B) %in% train.genes %>% which() %>%  length()
    #3.3Store the results
    scored<- scored %>% add_row(Symbol=test.gene,score_interaction=score+0.5*score2)
    
  }
  #4. Return results
  scored<- scored[-1,]
  scored<-scored[order(scored$score,decreasing = T),]
  scored$rank <- order(scored$score,decreasing = T)
  return(scored)
}

####2.2 IID数据库####
all_result <- fread(".\\RA_gene_gold_matrix.txt",sep = "\t")%>% mutate(order = rank(P.value,na.last = T))
defector<- all_result$Symbol[1]
train.symbol <-  all_result[which(all_result$P.value<=0.05 & all_result$Symbol!=defector),] %>% dplyr::select(Symbol)
candidate.symbol <- c(defector,sample(all_result$Symbol[which(all_result$Symbol!=defector)], 99))
result_input<- fun.base.interaction(ppi_all,train.symbol$Symbol,candidate.symbol)



